# @Time    : 2025-04-11 18:15
# @Author: Fioman
# @Phone  : 13149920693
# @Tips      : Talk is cheap,show me the code ^_^
import re

import requests

from FirstLesson import crawler_config

headers = {
    "User-Agent": crawler_config.userAgent,
}

url = "https://www.dytt8899.com"
# 上面的网站不太正规,访问https的时候如果安全证书不合法,会报错,所以使用verify=False
# 可以去除掉安全验证
resp = requests.get(url, verify=False)  # verify=False 去掉安全验证
resp.encoding = "gb2312"  # 指定页面源代码的格式
print(resp.text)

# 拿到url中的li
obj1 = re.compile(r"2025必看热片.*?<ul>(?P<ul>.*?)</ul>", re.S)
obj2 = re.compile(r"<a href='(?P<href>.*?)'", re.S)
result1 = obj1.finditer(resp.text)
for it in result1:
    ul = it.group("ul")
    # print(ul)
    # 提取子页面链接:
    result2 = obj2.finditer(ul)
    for itt in result2:
        print(itt.group("href"))
